nlp :: NLPToken :: defaultinit
# Represent one word (or puncutation mark) in a `NLPSentence`.
class NLPToken
# Index of this word in the sentence.
var index: Int
# Original word
var word: String
# `word` lemma
var lemma: String
# Position of the first character in the input
var begin_offset: Int
# Position of the last character in the input
var end_offset: Int
# Part Of Speech tag
var pos: String
# Init `self` from an XML element.
#
# ~~~
# var xml = """
# <token id="2">
# <word>University</word>
# <lemma>University</lemma>
# <CharacterOffsetBegin>9</CharacterOffsetBegin>
# <CharacterOffsetEnd>19</CharacterOffsetEnd>
# <POS>NNP</POS>
# </token>""".to_xml["token"].first.as(XMLStartTag)
#
# var token = new NLPToken.from_xml(xml)
# assert token.index == 2
# assert token.word == "University"
# assert token.lemma == "University"
# assert token.begin_offset == 9
# assert token.end_offset == 19
# assert token.pos == "NNP"
# ~~~
init from_xml(xml: XMLStartTag) do
var index = xml.attributes.first.as(XMLStringAttr).value.to_i
var word = read_data(xml, "word")
var lemma = read_data(xml, "lemma")
var begin_offset = read_data(xml, "CharacterOffsetBegin").to_i
var end_offset = read_data(xml, "CharacterOffsetEnd").to_i
var pos = read_data(xml, "POS")
init(index, word, lemma, begin_offset, end_offset, pos)
end
private fun read_data(xml: XMLStartTag, tag_name: String): String do
var res = ""
if xml[tag_name].is_empty then return res
var first = xml[tag_name].first
if not first isa XMLStartTag then return res
var data = first.data
if data == null then return res
return data
end
end
lib/nlp/stanford.nit:239,1--299,3