defaultinit

Property definitions

nlp $ NLPDocument :: defaultinit

# A `Document` represent a text input given to the NLP processor.
#
# Once processed, it contains a list of sentences that contain tokens.
class NLPDocument

	#  NLPSentences contained in `self`
	var sentences = new Array[NLPSentence]

	# Init `self` from an xml element.
	#
	# ~~~
	# var xml = """
	# <root>
	#   <document>
	#     <sentences>
	#       <sentence id="1">
	#         <tokens>
	#           <token id="1">
	#             <word>Stanford</word>
	#             <lemma>Stanford</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>8</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>9</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>19</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#       <sentence id="2">
	#         <tokens>
	#           <token id="1">
	#             <word>UQAM</word>
	#             <lemma>UQAM</lemma>
	#             <CharacterOffsetBegin>0</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>4</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#           <token id="2">
	#             <word>University</word>
	#             <lemma>University</lemma>
	#             <CharacterOffsetBegin>5</CharacterOffsetBegin>
	#             <CharacterOffsetEnd>15</CharacterOffsetEnd>
	#             <POS>NNP</POS>
	#           </token>
	#         </tokens>
	#       </sentence>
	#     </sentences>
	#   </document>
	# </root>""".to_xml.as(XMLDocument)
	#
	# var document = new NLPDocument.from_xml(xml)
	# assert document.sentences.length == 2
	# assert document.sentences.first.tokens.first.word == "Stanford"
	# assert document.sentences.last.tokens.first.word == "UQAM"
	# ~~~
	init from_xml(xml: XMLDocument) do
		for obj in xml["root"].first["document"].first["sentences"].first["sentence"] do
			if obj isa XMLStartTag then
				sentences.add new NLPSentence.from_xml(obj)
			else
				print "Warning: malformed xml, `sentences` is supposed to contain `sencence` tags"
			end
		end
	end

	# Init `self` from a XML file.
	init from_xml_file(path: String) do
		var file = new FileReader.open(path)
		var xml = file.read_lines
		file.close
		xml.shift # remove xml doctype
		xml.shift # remove xslt link
		from_xml(xml.join("\n").to_xml.as(XMLDocument))
	end
end
lib/nlp/stanford.nit:109,1--188,3
init defaultinit

Summary

Property definitions

nlp$NLPDocument$defaultinit

Property definitions

nlp $ NLPDocument :: defaultinit