nlp :: NLPSentence :: defaultinit
# Represent one sentence in a `Document`.
class NLPSentence
# Index of this sentence in the input text.
var index: Int
# NLPTokens contained in `self`.
var tokens = new Array[NLPToken]
# Init `self` from an XML element.
#
# ~~~
# var xml = """
# <sentence id="1">
# <tokens>
# <token id="1">
# <word>Stanford</word>
# <lemma>Stanford</lemma>
# <CharacterOffsetBegin>0</CharacterOffsetBegin>
# <CharacterOffsetEnd>8</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# <token id="2">
# <word>University</word>
# <lemma>University</lemma>
# <CharacterOffsetBegin>9</CharacterOffsetBegin>
# <CharacterOffsetEnd>19</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# </tokens>
# </sentence>""".to_xml["sentence"].first.as(XMLStartTag)
#
# var sentence = new NLPSentence.from_xml(xml)
# assert sentence.index == 1
# assert sentence.tokens.length == 2
# ~~~
init from_xml(xml: XMLStartTag) do
var index = xml.attributes.first.as(XMLStringAttr).value.to_i
for obj in xml["tokens"].first["token"] do
if obj isa XMLStartTag then
tokens.add new NLPToken.from_xml(obj)
else
print "Warning: malformed xml, `tokens` is supposed to contain `token` tags"
end
end
init(index)
end
end
lib/nlp/stanford.nit:190,1--237,3