A `Document` represent a text input given to the NLP processor.

# A `Document` represent a text input given to the NLP processor.
#
# Once processed, it contains a list of sentences that contain tokens.
class NLPDocument

	#  NLPSentences contained in `self`
	var sentences = new Array[NLPSentence]

	# Init `self` from an xml element.
	#
	# ~~~
	# var xml = """
	# <root>
	#   <document>
	#     <sentences>
	#       <sentence id="1">
	#         <tokens>
	#           <token id="1">
	#             <word>Stanford</word>
	#             <lemma>Stanford</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>8</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>9</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>19</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#       <sentence id="2">
	#         <tokens>
	#           <token id="1">
	#             <word>UQAM</word>
	#             <lemma>UQAM</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>4</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>5</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>15</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#     </sentences>
	#   </document>
	# </root>""".to_xml.as(XMLDocument)
	#
	# var document = new NLPDocument.from_xml(xml)
	# assert document.sentences.length == 2
	# assert document.sentences.first.tokens.first.word == "Stanford"
	# assert document.sentences.last.tokens.first.word == "UQAM"
	# ~~~
	init from_xml(xml: XMLDocument) do
		for obj in xml["root"].first["document"].first["sentences"].first["sentence"] do
			if obj isa XMLStartTag then
				sentences.add new NLPSentence.from_xml(obj)
			else
				print "Warning: malformed xml, `sentences` is supposed to contain `sencence` tags"
			end
		end
	end

	# Init `self` from a XML file.
	init from_xml_file(path: String) do
		var file = new FileReader.open(path)
		var xml = file.read_lines
		file.close
		xml.shift # remove xml doctype
		xml.shift # remove xslt link
		from_xml(xml.join("\n").to_xml.as(XMLDocument))
	end
end

lib/nlp/stanford.nit:109,1--188,3

class NLPDocument

Summary

A Document represent a text input given to the NLP processor.

Introduced properties

defaultinit

from_xml

from_xml_file

sentences

sentences=

Redefined properties

SELF

A Document represent a text input given to the NLP processor.

Introduced properties

init defaultinit

init from_xml(xml: XMLDocument)

init from_xml_file(path: String)

fun sentences: Array[NLPSentence]

protected fun sentences=(sentences: Array[NLPSentence])

Redefined properties

redef type SELF: NLPDocument

Summary

All properties

!=

==

CLASS

SELF

class_factory

class_name

defaultinit

defaultinit

from_xml

from_xml_file

get_class

hash

init

inspect

inspect_head

is_same_instance

is_same_serialized

is_same_type

object_id

output

output_class_name

sentences

sentences=

serialization_hash

sys

to_jvalue

to_s

All properties

fun !=(other: nullable Object): Bool

fun ==(other: nullable Object): Bool

type CLASS: Class[SELF]

type SELF: Object

protected fun class_factory(name: String): CLASS

fun class_name: String

init defaultinit

init defaultinit

init from_xml(xml: XMLDocument)

init from_xml_file(path: String)

fun get_class: CLASS

fun hash: Int

init init

fun inspect: String

protected fun inspect_head: String

intern fun is_same_instance(other: nullable Object): Bool

fun is_same_serialized(other: nullable Object): Bool

intern fun is_same_type(other: Object): Bool

intern fun object_id: Int

fun output

intern fun output_class_name

fun sentences: Array[NLPSentence]

protected fun sentences=(sentences: Array[NLPSentence])

fun serialization_hash: Int

intern fun sys: Sys

abstract fun to_jvalue(env: JniEnv): JValue

fun to_s: String

Summary

Parents

Object

A `Document` represent a text input given to the NLP processor.

A `Document` represent a text input given to the NLP processor.