nlp :: NLPDocument :: from_xml
self
from an xml element.var xml = """
<root>
<document>
<sentences>
<sentence id="1">
<tokens>
<token id="1">
<word>Stanford</word>
<lemma>Stanford</lemma>
<CharacterOffsetBegin>0</CharacterOffsetBegin>
<CharacterOffsetEnd>8</CharacterOffsetEnd>
<POS>NNP</POS>
</token>
<token id="2">
<word>University</word>
<lemma>University</lemma>
<CharacterOffsetBegin>9</CharacterOffsetBegin>
<CharacterOffsetEnd>19</CharacterOffsetEnd>
<POS>NNP</POS>
</token>
</tokens>
</sentence>
<sentence id="2">
<tokens>
<token id="1">
<word>UQAM</word>
<lemma>UQAM</lemma>
<CharacterOffsetBegin>0</CharacterOffsetBegin>
<CharacterOffsetEnd>4</CharacterOffsetEnd>
<POS>NNP</POS>
</token>
<token id="2">
<word>University</word>
<lemma>University</lemma>
<CharacterOffsetBegin>5</CharacterOffsetBegin>
<CharacterOffsetEnd>15</CharacterOffsetEnd>
<POS>NNP</POS>
</token>
</tokens>
</sentence>
</sentences>
</document>
</root>""".to_xml.as(XMLDocument)
var document = new NLPDocument.from_xml(xml)
assert document.sentences.length == 2
assert document.sentences.first.tokens.first.word == "Stanford"
assert document.sentences.last.tokens.first.word == "UQAM"
# Init `self` from an xml element.
#
# ~~~
# var xml = """
# <root>
# <document>
# <sentences>
# <sentence id="1">
# <tokens>
# <token id="1">
# <word>Stanford</word>
# <lemma>Stanford</lemma>
# <CharacterOffsetBegin>0</CharacterOffsetBegin>
# <CharacterOffsetEnd>8</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# <token id="2">
# <word>University</word>
# <lemma>University</lemma>
# <CharacterOffsetBegin>9</CharacterOffsetBegin>
# <CharacterOffsetEnd>19</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# </tokens>
# </sentence>
# <sentence id="2">
# <tokens>
# <token id="1">
# <word>UQAM</word>
# <lemma>UQAM</lemma>
# <CharacterOffsetBegin>0</CharacterOffsetBegin>
# <CharacterOffsetEnd>4</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# <token id="2">
# <word>University</word>
# <lemma>University</lemma>
# <CharacterOffsetBegin>5</CharacterOffsetBegin>
# <CharacterOffsetEnd>15</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>
# </tokens>
# </sentence>
# </sentences>
# </document>
# </root>""".to_xml.as(XMLDocument)
#
# var document = new NLPDocument.from_xml(xml)
# assert document.sentences.length == 2
# assert document.sentences.first.tokens.first.word == "Stanford"
# assert document.sentences.last.tokens.first.word == "UQAM"
# ~~~
init from_xml(xml: XMLDocument) do
for obj in xml["root"].first["document"].first["sentences"].first["sentence"] do
if obj isa XMLStartTag then
sentences.add new NLPSentence.from_xml(obj)
else
print "Warning: malformed xml, `sentences` is supposed to contain `sencence` tags"
end
end
end
lib/nlp/stanford.nit:117,2--177,4