nlp :: NLPProcessor :: process_files
Returns a map of file path associated with their NLPDocument.
# Creates a new NLPDocument from a list of files (batch mode)
#
# Returns a map of file path associated with their NLPDocument.
fun process_files(paths: Array[String]): Map[String, NLPDocument] do
var res = new HashMap[String, NLPDocument]
for file in paths do
res[file] = process_file(file)
end
return res
end
lib/nlp/stanford.nit:40,2--49,4
# Batch mode.
#
# Returns a map of file path associated with their NLPDocument.
redef fun process_files(inputs) do
# Prepare the input file list
var input_file = "inputs.list"
var fw = new FileWriter.open(input_file)
for input in inputs do fw.write "{input}\n"
fw.close
# Run Stanford NLP jar
sys.system "java -cp \"{java_cp}\" -Xmx2g edu.stanford.nlp.pipeline.StanfordCoreNLP -annotators tokenize,ssplit,pos,lemma -outputFormat xml -filelist {input_file} -outputDirectory {tmp_dir}"
# Parse output
var map = new HashMap[String, NLPDocument]
for input in inputs do
var out_file = tmp_dir / "{input.basename}.xml"
map[input] = new NLPDocument.from_xml_file(out_file)
end
input_file.file_delete
tmp_dir.rmdir
return map
end
lib/nlp/stanford.nit:85,2--106,4