lib/nlp: avoid crash when reading token XML
authorAlexandre Terrasa <alexandre@moz-code.org>
Fri, 22 Sep 2017 20:36:42 +0000 (16:36 -0400)
committerAlexandre Terrasa <alexandre@moz-code.org>
Thu, 12 Oct 2017 00:49:00 +0000 (20:49 -0400)
Signed-off-by: Alexandre Terrasa <alexandre@moz-code.org>

lib/nlp/stanford.nit

index 22153b5..a637d53 100644 (file)
@@ -279,13 +279,23 @@ class NLPToken
        # ~~~
        init from_xml(xml: XMLStartTag) do
                var index = xml.attributes.first.as(XMLStringAttr).value.to_i
-               var word = xml["word"].first.as(XMLStartTag).data
-               var lemma = xml["lemma"].first.as(XMLStartTag).data
-               var begin_offset = xml["CharacterOffsetBegin"].first.as(XMLStartTag).data.to_i
-               var end_offset = xml["CharacterOffsetEnd"].first.as(XMLStartTag).data.to_i
-               var pos = xml["POS"].first.as(XMLStartTag).data
+               var word = read_data(xml, "word")
+               var lemma = read_data(xml, "lemma")
+               var begin_offset = read_data(xml, "CharacterOffsetBegin").to_i
+               var end_offset = read_data(xml, "CharacterOffsetEnd").to_i
+               var pos = read_data(xml, "POS")
                init(index, word, lemma, begin_offset, end_offset, pos)
        end
+
+       private fun read_data(xml: XMLStartTag, tag_name: String): String do
+               var res = ""
+               if xml[tag_name].is_empty then return res
+               var first = xml[tag_name].first
+               if not first isa XMLStartTag then return res
+               var data = first.data
+               if data == null then return res
+               return data
+       end
 end
 
 # Stanford web server